In [1]:
%matplotlib inline
# General
from __future__ import unicode_literals
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
import numpy as np
import pandas as pd
# Logger
import logging
logging.basicConfig(level=logging.WARN)
# ALCS + custom environments
import sys, os
sys.path.append(os.path.abspath('../../..'))
# Enable automatic module reload
%load_ext autoreload
%autoreload 2
# Load PyALCS module
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList
from lcs.metrics import population_metrics
# Load environments
import gym
import gym_multiplexer
In [2]:
mp = gym.make('boolean-multiplexer-20bit-v0')
situation = mp.reset()
# render phenotype
mp.render()
In [3]:
# perform random action
state, reward, done, _ = mp.step(mp.action_space.sample())
print(f"New state: {state}, reward: {reward}, is done: {done}")
In [4]:
class MultiplexerAdapter(EnvironmentAdapter):
@staticmethod
def to_genotype(phenotype):
return [str(x) for x in phenotype]
In [5]:
genotype = MultiplexerAdapter().to_genotype(state)
''.join(genotype)
Out[5]:
In [6]:
def get_6bit_mp_actors():
mp = gym.make('boolean-multiplexer-6bit-v0')
cfg = Configuration(
mp.env.observation_space.n, 2,
environment_adapter=MultiplexerAdapter(),
user_metrics_collector_fcn=population_metrics,
do_ga=True)
return ACS2(cfg), mp
def get_11bit_mp_actors():
mp = gym.make('boolean-multiplexer-11bit-v0')
cfg = Configuration(
mp.env.observation_space.n, 2,
environment_adapter=MultiplexerAdapter(),
user_metrics_collector_fcn=population_metrics,
do_ga=True)
return ACS2(cfg), mp
def get_20bit_mp_actors():
mp = gym.make('boolean-multiplexer-20bit-v0')
cfg = Configuration(
mp.env.observation_space.n, 2,
environment_adapter=MultiplexerAdapter(),
user_metrics_collector_fcn=population_metrics,
do_ga=True)
return ACS2(cfg), mp
In [7]:
def perform_experiment(agent, env, trials=250_000):
population, metrics = agent.explore_exploit(env, trials)
print("Population size: {}".format(metrics[-1]['population']))
print("Reliable size: {}".format(metrics[-1]['reliable']))
print(metrics[-1])
reliable_classifiers = [c for c in population if c.is_reliable()]
reliable_classifiers = sorted(reliable_classifiers, key=lambda cl: -cl.fitness)
# Print top 10 reliable classifiers
for cl in reliable_classifiers[:10]:
print(f"{cl}, q: {cl.q:.2f}, fit: {cl.fitness:.2f}, exp: {cl.exp:.2f}")
return population, metrics
Here you will probably want to run these experiments for about 250k trials.
In [8]:
TRIALS = 5_000
In [9]:
%%time
p6, m6 = perform_experiment(*get_6bit_mp_actors(), trials=TRIALS)
In [10]:
%%time
p11, m11 = perform_experiment(*get_11bit_mp_actors(), trials=TRIALS)
In [ ]:
%%time
p20, m20 = perform_experiment(*get_20bit_mp_actors(), trials=TRIALS)
In [ ]:
def parse_metrics(metrics):
lst = [[
m['trial'],
m['numerosity'],
m['reliable'],
m['reward'],
] for m in metrics]
df = pd.DataFrame(lst, columns=['trial', 'numerosity', 'reliable', 'reward'])
df = df.set_index('trial')
return df
In [ ]:
# parse metrics to df
df6bit = parse_metrics(m6)
df11bit = parse_metrics(m11)
df20bit = parse_metrics(m20)
In [ ]:
window=50
fig, ax = plt.subplots()
df6bit['reliable'].rolling(window=window).mean().plot(label='6-bit', linewidth=1.0, ax=ax)
df11bit['reliable'].rolling(window=window).mean().plot(label='11-bit', linewidth=1.0, ax=ax)
df20bit['reliable'].rolling(window=window).mean().plot(label='20-bit', linewidth=1.0, ax=ax)
ax.set_xlabel('Trial')
ax.set_ylabel('Reliable classifiers')
ax.set_title(f'Number of reliable classifiers for boolean MPX.\nResults averaged over {window} trials')
plt.legend()
plt.show()
In [ ]:
window=250
fig, ax = plt.subplots()
df6bit['reward'].rolling(window=window).mean().plot(label='6-bit', linewidth=1.0, ax=ax)
df11bit['reward'].rolling(window=window).mean().plot(label='11-bit', linewidth=1.0, ax=ax)
df20bit['reward'].rolling(window=window).mean().plot(label='20-bit', linewidth=1.0, ax=ax)
plt.axhline(1000, c='black', linewidth=1.0, linestyle=':')
ax.set_xlabel('Trial')
ax.set_ylabel('Reward')
ax.set_title(f'Reward obtained.\nResults averaged over {window} trials')
ax.set_ylim([500, 1050])
plt.legend()
plt.show()